Final Project (Due Wednesday, May 10th)

Author
Affiliation

Jerry Cai

University of Pennsylvania

Published

May 10, 2024

Final Project

Jerry Cai

R Code

library(ggplot2)
library(readr)
library(ggdag)

Attaching package: 'ggdag'
The following object is masked from 'package:stats':

    filter
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.3     ✔ stringr   1.5.0
✔ forcats   1.0.0     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.0
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks ggdag::filter(), stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(tidyr)
library(dplyr)
library(gt)
library(modelsummary)

Variable names - Codebook format:

Annual total count : annualTotalCount Software development : softwareDev Writing and translation : writingTranslation Clerical and data entry : clericalDataEntry Creative and multimedia : creativeMultimedia Professional services : professionalServices Sales and marketing support : salesMarketingSupport

Data 1 Path (OLI)

# Write combined_data as csv
input_file_path_combined <- "/Users/jycai/Documents/GitHub/psci3200_jerrycai/combined_data.csv"

# write_csv(combined_data, input_file_path_combined)

# Read combined_data csv
combined_data <- read_csv(input_file_path_combined)
Rows: 11357 Columns: 4
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): country, indicator
dbl (2): value, year

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Create a named vector for the mappings
code_to_name <- c(
  "NY.ADJ.NNTY.CD" = "adjIncome",
  "NY.ADJ.NNTY.PC.CD" = "adjIncomeCapita",
  "BX.GSR.CMCP.ZS" = "commExports",
  "BX.GSR.CCIS.ZS" = "serviceICTExportsPercent",
  "BX.GSR.CCIS.CD" = "commExports",
  "BX.GSR.NFSV.CD" = "serviceICTExports",
  "SL.UEM.TOTL.ZS" = "totalUnemployment",
  "SL.EMP.WORK.ZS" = "totalWageSalWorkers",
  "gigEconCount" = "gigEconCount"
)

Model_2_a: adjIncomeCapita~totalDemand

# Model_2_a

# Check if both indicators are available for each country and year
combined_data_2a <- combined_data %>%
  filter(indicator %in% c("adjIncomeCapita", "totalDemand")) %>%
  filter(country %in% c("Chile", "Argentina", "Brazil", "Bolivia", "Peru", "Venezuela", "Uruguay", "Ecuador", "Mexico", "Panama", "Nicaragua", "Costa Rica", "Honduras"))

# Reshaping the data to have one row per country-year with separate columns for each indicator
combined_data_2a_wide <- combined_data_2a %>%
  pivot_wider(names_from = indicator, values_from = value) %>%
  filter(!is.na(adjIncomeCapita) & !is.na(totalDemand))
  
combined_data_2a_wide <- combined_data_2a_wide[combined_data_2a_wide$totalDemand <= 2.5e+05, ]

# Run the regression model
regression_model_2_a <- lm(`adjIncomeCapita` ~ `totalDemand`, data = combined_data_2a_wide)

# Display the summary of the regression model
summary(regression_model_2_a)

Call:
lm(formula = adjIncomeCapita ~ totalDemand, data = combined_data_2a_wide)

Residuals:
    Min      1Q  Median      3Q     Max 
-5720.1 -3620.5  -930.6  3602.5  9150.9 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept) 7.254e+03  7.333e+02   9.892 4.63e-14 ***
totalDemand 1.740e-02  2.633e-02   0.661    0.511    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 4401 on 58 degrees of freedom
Multiple R-squared:  0.007473,  Adjusted R-squared:  -0.00964 
F-statistic: 0.4367 on 1 and 58 DF,  p-value: 0.5113
# Plot diagnostics
plot(regression_model_2_a)

# Create a scatter plot
library(ggplot2)

scatter_plot_2_a <- ggplot(combined_data_2a_wide, aes(x = totalDemand, y = adjIncomeCapita )) +
  geom_point(alpha = 0.6) +  # Adjust point transparency with alpha
  labs(title = "Effect of gig totalDemand on adjIncomeCapita",
       x = "totalDemand",
       y = "adjIncomeCapita") +
  theme_minimal()  # Use a minimal theme for a cleaner look

# Display the plot
print(scatter_plot_2_a)

Model_2_b: adjIncomeCapita~totalSupply

# Model_2_b

# Check if both indicators are available for each country and year
combined_data_2b <- combined_data %>%
  filter(indicator %in% c("adjIncomeCapita", "totalSupply"))

# Reshaping the data to have one row per country-year with separate columns for each indicator
combined_data_2b_wide <- combined_data_2b %>%
  pivot_wider(names_from = indicator, values_from = value) %>%
  filter(!is.na(adjIncomeCapita) & !is.na(totalSupply))
  
combined_data_2b_wide <- combined_data_2b_wide[combined_data_2b_wide$totalSupply <= 1e+06, ]

# Run the regression model
regression_model_2_b <- lm(`adjIncomeCapita` ~ `totalSupply`, data = combined_data_2b_wide)

# Display the summary of the regression model
summary(regression_model_2_b)

Call:
lm(formula = adjIncomeCapita ~ totalSupply, data = combined_data_2b_wide)

Residuals:
   Min     1Q Median     3Q    Max 
-21004 -10683  -7419   3932  57259 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept) 1.214e+04  8.957e+02  13.559  < 2e-16 ***
totalSupply 1.076e-02  4.145e-03   2.595  0.00976 ** 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 15780 on 453 degrees of freedom
Multiple R-squared:  0.01465,   Adjusted R-squared:  0.01247 
F-statistic: 6.735 on 1 and 453 DF,  p-value: 0.009763
# Plot diagnostics
plot(regression_model_2_b)

# Create a scatter plot
library(ggplot2)

scatter_plot_2_b <- ggplot(combined_data_2b_wide, aes(x = totalSupply, y = adjIncomeCapita )) +
  geom_point(alpha = 0.6) +  # Adjust point transparency with alpha
  labs(title = "Effect of gig totalDemand on adjIncomeCapita",
       x = "totalSupply",
       y = "adjIncomeCapita") +
  theme_minimal()  # Use a minimal theme for a cleaner look

# Display the plot
print(scatter_plot_2_b)

Model_2_c: totalDemand ~ serviceICTExports + totalWageSalWorkers

# Model_2_c

# Check if both indicators are available for each country and year
combined_data_2c <- combined_data %>%
  filter(indicator %in% c("totalDemand", "serviceICTExports", "totalWageSalWorkers" )) %>%
  filter(country %in% c("Chile", "Argentina", "Brazil", "Bolivia", "Peru", "Venezuela", "Uruguay", "Ecuador", "Mexico", "Panama", "Nicaragua", "Costa Rica", "Honduras"))

# Reshaping the data to have one row per country-year with separate columns for each indicator
combined_data_2c_wide <- combined_data_2c %>%
  pivot_wider(names_from = indicator, values_from = value) %>%
  filter(!is.na(totalDemand) & !is.na(serviceICTExports) & !is.na(totalWageSalWorkers))
  
combined_data_2c_wide <- combined_data_2c_wide[combined_data_2c_wide$totalDemand <= 1e+06, ]

# Run the regression model
regression_model_2_c <- lm(`totalDemand` ~ `serviceICTExports` + `totalWageSalWorkers`, data = combined_data_2c_wide)

# Display the summary of the regression model
summary(regression_model_2_c)

Call:
lm(formula = totalDemand ~ serviceICTExports + totalWageSalWorkers, 
    data = combined_data_2c_wide)

Residuals:
   Min     1Q Median     3Q    Max 
-46595  -3586   -123   4173  41461 

Coefficients:
                      Estimate Std. Error t value Pr(>|t|)    
(Intercept)          3.456e+03  8.423e+03   0.410    0.683    
serviceICTExports    1.562e-06  1.725e-07   9.053 1.26e-12 ***
totalWageSalWorkers -4.590e+01  1.493e+02  -0.307    0.760    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 13160 on 57 degrees of freedom
Multiple R-squared:  0.6468,    Adjusted R-squared:  0.6344 
F-statistic: 52.18 on 2 and 57 DF,  p-value: 1.318e-13
# Plot diagnostics
plot(regression_model_2_c)

# Create a scatter plot
library(ggplot2)

scatter_plot_2_c <- ggplot(combined_data_2c_wide, aes(x = serviceICTExports, y = totalDemand )) +
  geom_point(alpha = 0.6) +  # Adjust point transparency with alpha
  labs(title = "Effect of serviceICTExports on gig economy totalDemand",
       x = "serviceICTExports",
       y = "totalDemand") +
  theme_minimal()  # Use a minimal theme for a cleaner look

# Display the plot
print(scatter_plot_2_c)

scatter_plot_2_c_i <- ggplot(combined_data_2c_wide, aes(x = totalWageSalWorkers, y = totalDemand )) +
  geom_point(alpha = 0.6) +  # Adjust point transparency with alpha
  labs(title = "Effect of totalWageSalWorkers on gig economy totalDemand",
       x = "totalWageSalWorkers",
       y = "totalDemand") +
  theme_minimal()  # Use a minimal theme for a cleaner look

# Display the plot
print(scatter_plot_2_c_i)

Model_2_d: totalSupply ~ serviceICTExports + totalUnemployment

# Model_2_d

# Check if both indicators are available for each country and year
combined_data_2d <- combined_data %>%
  filter(indicator %in% c("totalSupply", "serviceICTExports", "totalUnemployment" )) %>%
  filter(country %in% c("Chile", "Argentina", "Brazil", "Bolivia", "Peru", "Venezuela", "Uruguay", "Ecuador", "Mexico", "Panama", "Nicaragua", "Costa Rica", "Honduras"))

# Reshaping the data to have one row per country-year with separate columns for each indicator
combined_data_2d_wide <- combined_data_2d %>%
  pivot_wider(names_from = indicator, values_from = value) %>%
  filter(!is.na(totalSupply) & !is.na(serviceICTExports) & !is.na(totalUnemployment))
  
combined_data_2d_wide <- combined_data_2d_wide[combined_data_2d_wide$totalSupply <= 1e+06, ]

# Run the regression model
regression_model_2_d <- lm(`totalSupply` ~ `serviceICTExports` + `totalUnemployment`, data = combined_data_2d_wide)

# Display the summary of the regression model
summary(regression_model_2_d)

Call:
lm(formula = totalSupply ~ serviceICTExports + totalUnemployment, 
    data = combined_data_2d_wide)

Residuals:
    Min      1Q  Median      3Q     Max 
-209372  -53819  -26412   30463  469295 

Coefficients:
                    Estimate Std. Error t value Pr(>|t|)    
(Intercept)       -1.636e+04  4.996e+04  -0.328 0.744831    
serviceICTExports  6.720e-06  1.781e-06   3.773 0.000488 ***
totalUnemployment  9.278e+03  6.117e+03   1.517 0.136676    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 137800 on 43 degrees of freedom
Multiple R-squared:  0.3038,    Adjusted R-squared:  0.2715 
F-statistic: 9.384 on 2 and 43 DF,  p-value: 0.0004152
# Plot diagnostics
plot(regression_model_2_d)

# Create a scatter plot
library(ggplot2)

scatter_plot_2_d <- ggplot(combined_data_2d_wide, aes(x = serviceICTExports, y = totalSupply )) +
  geom_point(alpha = 0.6) +  # Adjust point transparency with alpha
  labs(title = "Effect of serviceICTExports on gig economy totalDemand",
       x = "serviceICTExports",
       y = "totalSupply") +
  theme_minimal()  # Use a minimal theme for a cleaner look

# Display the plot
print(scatter_plot_2_d)

scatter_plot_2_d_i <- ggplot(combined_data_2d_wide, aes(x = totalUnemployment, y = totalSupply )) +
  geom_point(alpha = 0.6) +  # Adjust point transparency with alpha
  labs(title = "Effect of totalWageSalWorkers on gig economy totalDemand",
       x = "totalUnemployment",
       y = "totalSupply") +
  theme_minimal()  # Use a minimal theme for a cleaner look

# Display the plot
print(scatter_plot_2_d_i)